library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(stringr)
loading the different sets of data that will be used
drugs_data = read.csv("Drug_overdose_death_rates__by_drug_type__sex__age__race__and_Hispanic_origin__United_States.csv")
death_rates <- read.csv("Death_rates_for_suicide__by_sex__race__Hispanic_origin__and_age__United_States.csv")
leading_causes_d = read.csv('NCHS_-_Leading_Causes_of_Death__United_States.csv')
suicide_data <- read_csv("suicide-rate-by-country-2024.csv")
## Rows: 184 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): country
## dbl (3): suicideRateByCountry_rate2019both, suicideRateByCountry_rate2019mal...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
california_data <- read.csv("suicide-lghc-indicator-21.csv")
# suicide_data <- read_csv("suicide-rate-by-country-2024.csv")
#
#
# country_rate <- suicide_data %>%
# select(country, Combined_Suicide_Rate = suicideRateByCountry_rate2019both) %>%
# arrange(desc(Combined_Suicide_Rate)) %>%
# mutate(Rank = row_number()) # Add a ranking column after arranging
#
# table_output <- kable(country_rate, format = "html", col.names = c("Rank", "Country", "Suicide Rate (2019)"),
# caption = "Ranked Suicide Rates by Country (2019)") %>%
# kable_styling(full_width = FALSE, position = "center") %>%
# scroll_box(width = "100%", height = "500px")
#
# # Output the table
# print(table_output)
suicides <- filter(leading_causes_d, Cause.Name == "Suicide")
heart_disease <- filter(leading_causes_d, Cause.Name == "Heart disease")
cancer <- filter(leading_causes_d, Cause.Name == "Cancer")
suicide_stats <- suicides %>%
group_by(Year) %>%
summarize(Total_Deaths = sum(Deaths))
heart_disease_stats <- heart_disease %>%
group_by(Year) %>%
summarize(Total_Deaths = sum(Deaths))
cancer_stats <- cancer %>%
group_by(Year) %>%
summarize(Total_Deaths = sum(Deaths))
ggplot() +
geom_line(data = suicide_stats, aes(x = Year, y = Total_Deaths, color = "Suicide")) +
geom_line(data = heart_disease_stats, aes(x = Year, y = Total_Deaths, color = "Heart Disease")) +
geom_line(data = cancer_stats, aes(x = Year, y = Total_Deaths, color = "Cancer")) +
labs(title = "Death Trends by Cause", x = "Year", y = "Total Deaths") +
theme_bw()
# Filter data for suicides
suicide_data <- leading_causes_d[leading_causes_d$Cause.Name == "Suicide",]
# Summing up deaths by year
suicide_trend <- aggregate(Deaths ~ Year, data = suicide_data, sum)
# Plotting the data
ggplot(suicide_trend, aes(x = Year, y = Deaths)) +
geom_line(group=1, colour="blue") +
geom_point(colour="red") +
labs(title = "Suicides Over the Years in the United States",
x = "Year",
y = "Number of Suicides") +
theme_minimal()
state_suicides <- leading_causes_d %>%
filter(Cause.Name== "Suicide", State != "United States") %>%
group_by(State) %>%
summarise(Total_Suicides = sum(Deaths)) %>%
arrange(desc(Total_Suicides))
# Create the bar chart with the filtered data
ggplot(state_suicides, aes(x = reorder(State, Total_Suicides), y = Total_Suicides)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(title = "Total Suicides by State",
x = "Total Suicides",
y = "State") +
theme_minimal() +
theme(
axis.text.y = element_text(size = 5),
plot.margin = unit(c(1, 1, 1, 1), "cm")
)
average_rates <- california_data %>%
group_by(Geography) %>%
summarise(Average_Rate = mean(Rate, na.rm = TRUE)) %>%
arrange(desc(Average_Rate))
# Plot average rates by Geography
ggplot(average_rates, aes(x = reorder(Geography, -Average_Rate), y = Average_Rate)) +
geom_bar(stat = "identity", fill = "blue") +
labs(title = "Average Suicide Rates by City in California",
x = "City",
y = "Average Rate per 100,000 People") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
# Load and prepare the dataset
drugs_data <- read_csv("Drug_overdose_death_rates__by_drug_type__sex__age__race__and_Hispanic_origin__United_States.csv") %>%
rename(DrugType = PANEL) %>%
mutate(
SimpleDrugType = case_when(
DrugType == "Drug overdose deaths involving other synthetic opioids (other than methadone)" ~ "Synthetic Opioids (Excl. Methadone)",
DrugType == "Drug overdose deaths involving any opioid" ~ "Any Opioid",
TRUE ~ DrugType
),
Gender = case_when(
str_detect(STUB_LABEL, "Male:") ~ "Male",
str_detect(STUB_LABEL, "Female:") ~ "Female",
TRUE ~ "Unspecified"
)
)
## Rows: 6228 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): INDICATOR, PANEL, UNIT, STUB_NAME, STUB_LABEL, AGE, FLAG
## dbl (8): PANEL_NUM, UNIT_NUM, STUB_NAME_NUM, STUB_LABEL_NUM, YEAR, YEAR_NUM,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Calculate and filter for peak years in a generic way for reuse
total_by_year <- drugs_data %>%
filter(SimpleDrugType %in% c("Synthetic Opioids (Excl. Methadone)", "Any Opioid")) %>%
group_by(SimpleDrugType, YEAR) %>%
summarise(Total_Estimate = sum(ESTIMATE, na.rm = TRUE), .groups = 'drop')
peak_years <- total_by_year %>%
group_by(SimpleDrugType) %>%
slice_max(Total_Estimate) %>%
ungroup()
peak_data_analysis <- drugs_data %>%
filter(
SimpleDrugType %in% peak_years$SimpleDrugType,
YEAR %in% peak_years$YEAR
)
# Prepare the common part for different racial analyses
peak_data_analysis <- peak_data_analysis %>%
mutate(Race = case_when(
str_detect(STUB_LABEL, "White") ~ "White",
str_detect(STUB_LABEL, "Black or African American") ~ "Black or African American",
str_detect(STUB_LABEL, "American Indian or Alaska Native") ~ "American Indian or Alaska Native",
str_detect(STUB_LABEL, "Asian or Pacific Islander") ~ "Asian or Pacific Islander",
str_detect(STUB_LABEL, "Hispanic or Latino") ~ "Hispanic or Latino",
TRUE ~ "Other"
))
ggplot(peak_data_analysis, aes(x = Race, y = ESTIMATE, fill = Race)) +
geom_bar(stat = "identity", position = position_dodge()) +
facet_wrap(~SimpleDrugType, scales = "free_x") +
labs(title = "Drug Overdose Deaths by Race in Peak Years",
x = "Race",
y = "Total Estimates",
fill = "Race") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_bar()`).
# Filter data for White race by gender
white_data_by_gender <- peak_data_analysis %>%
filter(Race == "White") %>%
group_by(SimpleDrugType, Gender, YEAR) %>%
summarise(Total_Deaths = sum(ESTIMATE, na.rm = TRUE), .groups = 'drop')
# Filter data for Black or African American race by gender
black_data_by_gender <- peak_data_analysis %>%
filter(Race == "Black or African American") %>%
group_by(SimpleDrugType, Gender, YEAR) %>%
summarise(Total_Deaths = sum(ESTIMATE, na.rm = TRUE), .groups = 'drop')
# Calculate total deaths across all groups and years for each drug type
total_deaths_by_drug <- peak_data_analysis %>%
group_by(SimpleDrugType) %>%
summarise(Total_Deaths = sum(ESTIMATE, na.rm = TRUE), .groups = 'drop')
ggplot(white_data_by_gender, aes(x = YEAR, y = Total_Deaths, fill = Gender)) +
geom_bar(stat = "identity", position = position_dodge()) +
facet_wrap(~SimpleDrugType) +
labs(title = "Total Deaths by Year for White Race by Gender",
x = "Year",
y = "Total Deaths",
fill = "Gender") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(black_data_by_gender, aes(x = YEAR, y = Total_Deaths, fill = Gender)) +
geom_bar(stat = "identity", position = position_dodge()) +
facet_wrap(~SimpleDrugType) +
labs(title = "Total Deaths by Year for the African American Race by Gender",
x = "Year",
y = "Total Deaths",
fill = "Gender") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(total_deaths_by_drug, aes(x = SimpleDrugType, y = Total_Deaths, fill = SimpleDrugType)) +
geom_bar(stat = "identity") +
labs(title = "Total Deaths Across Both Groups by Drug Type",
x = "Drug Type",
y = "Total Deaths",
fill = "Drug Type") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Load the dataset
drugs_data <- read_csv("Drug_overdose_death_rates__by_drug_type__sex__age__race__and_Hispanic_origin__United_States.csv")
## Rows: 6228 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): INDICATOR, PANEL, UNIT, STUB_NAME, STUB_LABEL, AGE, FLAG
## dbl (8): PANEL_NUM, UNIT_NUM, STUB_NAME_NUM, STUB_LABEL_NUM, YEAR, YEAR_NUM,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename 'PANEL' to 'DrugType' and create simplified names
drugs_data <- drugs_data %>%
rename(DrugType = PANEL) %>%
mutate(
SimpleDrugType = case_when(
DrugType == "Drug overdose deaths involving other synthetic opioids (other than methadone)" ~ "Synth. Opioids (excl. Methadone)",
DrugType == "Drug overdose deaths involving any opioid" ~ "Any Opioid",
TRUE ~ "Other Types"
),
Gender = case_when(
str_detect(STUB_LABEL, "Male: White") ~ "Male",
str_detect(STUB_LABEL, "Female: White") ~ "Female",
TRUE ~ "Unspecified"
)
)
# Filter data specifically for 'White' race and specified drug types
filtered_data <- drugs_data %>%
filter(
SimpleDrugType %in% c("Synth. Opioids (excl. Methadone)", "Any Opioid"),
str_detect(STUB_LABEL, "White"),
Gender %in% c("Male", "Female")
)
# Plotting density of ESTIMATE over the years, combining male and female in the same graph for each drug type
ggplot(filtered_data, aes(x = ESTIMATE, fill = Gender)) +
geom_density(alpha = 0.5) +
facet_wrap(~SimpleDrugType) +
labs(title = "Combined Density of Drug Overdose Deaths Over Years by Gender, White",
x = "Total Deaths Estimate",
y = "Density") +
scale_fill_manual(values = c("Male" = "blue", "Female" = "red")) +
theme_minimal() +
theme(
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
strip.text.x = element_text(size = 12),
legend.position = "top"
)
# Print the plot
print(ggplot)
## function (data = NULL, mapping = aes(), ..., environment = parent.frame())
## {
## UseMethod("ggplot")
## }
## <bytecode: 0x7fbb8db341a0>
## <environment: namespace:ggplot2>
age_specific_data <- death_rates %>%
filter(STUB_NAME == "Age", AGE != "All ages")
p <- ggplot(age_specific_data, aes(x = YEAR, y = ESTIMATE, color = AGE)) +
geom_line() +
geom_point() +
labs(title = "Interactive Plot of Suicide Rates by Age Group",
x = "Year",
y = "Suicide Rate per 100,000 Population",
color = "Age Group")
ggplotly(p)
# Calculate the average suicide rate for each age group and sort them in descending order
average_age_rates <- death_rates %>%
group_by(AGE) %>%
summarise(AverageRate = mean(ESTIMATE, na.rm = TRUE)) %>%
arrange(desc(AverageRate))
# Extract the top 3 age groups with the highest average suicide rates
top_groups <- head(average_age_rates, 5)
ggplot(top_groups, aes(x = AGE, y = AverageRate, fill = AGE)) +
geom_bar(stat = "identity") + # Use pre-calculated heights for bars
labs(title = "Top 5 Age Groups with the Highest Average Suicide Rates",
x = "Age Group",
y = "Average Suicide Rate") +
theme_minimal() +
theme(legend.position = "none")